library(gridExtra)
library(knitr)
library(ggplot2)
library(fastqcr)
#must run this if fastqc is not already installed locally
#fastqc_install()
###ONLY THIS CHUNK REQUIRES MODIFICATION###
###assign your directory locations here:

#specify full path to directory containing a .fastq.gz file for each sample
fq.dir<-"/home/d669d153/work/phil.dicaeum/fastq"

#specify full path to the output directory where you want 
qc.dir<-"/home/d669d153/work/phil.dicaeum/qc"
#run fastqc on all .fastq.gz files, through r
#This only needs to be run once, if only tweaking downstream visualizations, you can comment out this step
fastqc(fq.dir = fq.dir, # FASTQ files directory
       qc.dir = qc.dir, # Results directory
       threads = 4      # Number of threads
       )
# List of files in the output directory to ensure fastqc worked
list.files(qc.dir)

[1] “D_hypoleucum_1271_fastqc.html”
[2] “D_hypoleucum_1271_fastqc.zip”
[3] “D_hypoleucum_1273_fastqc.html”
[4] “D_hypoleucum_1273_fastqc.zip”
[5] “D_hypoleucum_1275_fastqc.html”
[6] “D_hypoleucum_1275_fastqc.zip”
[7] “D_hypoleucum_14037_fastqc.html”
[8] “D_hypoleucum_14037_fastqc.zip”
[9] “D_hypoleucum_14061_fastqc.html”
[10] “D_hypoleucum_14061_fastqc.zip”
[11] “D_hypoleucum_14065_fastqc.html”
[12] “D_hypoleucum_14065_fastqc.zip”
[13] “D_hypoleucum_14075_fastqc.html”
[14] “D_hypoleucum_14075_fastqc.zip”
[15] “D_hypoleucum_14079_fastqc.html”
[16] “D_hypoleucum_14079_fastqc.zip”
[17] “D_hypoleucum_14120_fastqc.html”
[18] “D_hypoleucum_14120_fastqc.zip”
[19] “D_hypoleucum_14146_fastqc.html”
[20] “D_hypoleucum_14146_fastqc.zip”
[21] “D_hypoleucum_14181_fastqc.html”
[22] “D_hypoleucum_14181_fastqc.zip”
[23] “D_hypoleucum_14225_fastqc.html”
[24] “D_hypoleucum_14225_fastqc.zip”
[25] “D_hypoleucum_17969_fastqc.html”
[26] “D_hypoleucum_17969_fastqc.zip”
[27] “D_hypoleucum_17976_fastqc.html”
[28] “D_hypoleucum_17976_fastqc.zip”
[29] “D_hypoleucum_18070_fastqc.html”
[30] “D_hypoleucum_18070_fastqc.zip”
[31] “D_hypoleucum_18159_fastqc.html”
[32] “D_hypoleucum_18159_fastqc.zip”
[33] “D_hypoleucum_18188_fastqc.html”
[34] “D_hypoleucum_18188_fastqc.zip”
[35] “D_hypoleucum_18191_fastqc.html”
[36] “D_hypoleucum_18191_fastqc.zip”
[37] “D_hypoleucum_18193_fastqc.html”
[38] “D_hypoleucum_18193_fastqc.zip”
[39] “D_hypoleucum_19046_fastqc.html”
[40] “D_hypoleucum_19046_fastqc.zip”
[41] “D_hypoleucum_19066_fastqc.html”
[42] “D_hypoleucum_19066_fastqc.zip”
[43] “D_hypoleucum_19136_fastqc.html”
[44] “D_hypoleucum_19136_fastqc.zip”
[45] “D_hypoleucum_19177_fastqc.html”
[46] “D_hypoleucum_19177_fastqc.zip”
[47] “D_hypoleucum_19178_fastqc.html”
[48] “D_hypoleucum_19178_fastqc.zip”
[49] “D_hypoleucum_1956_fastqc.html”
[50] “D_hypoleucum_1956_fastqc.zip”
[51] “D_hypoleucum_19638_fastqc.html”
[52] “D_hypoleucum_19638_fastqc.zip”
[53] “D_hypoleucum_20193_fastqc.html”
[54] “D_hypoleucum_20193_fastqc.zip”
[55] “D_hypoleucum_20213_fastqc.html”
[56] “D_hypoleucum_20213_fastqc.zip”
[57] “D_hypoleucum_20214_fastqc.html”
[58] “D_hypoleucum_20214_fastqc.zip”
[59] “D_hypoleucum_20218_fastqc.html”
[60] “D_hypoleucum_20218_fastqc.zip”
[61] “D_hypoleucum_2067_fastqc.html”
[62] “D_hypoleucum_2067_fastqc.zip”
[63] “D_hypoleucum_20921_fastqc.html”
[64] “D_hypoleucum_20921_fastqc.zip”
[65] “D_hypoleucum_2208_fastqc.html”
[66] “D_hypoleucum_2208_fastqc.zip”
[67] “D_hypoleucum_2229_fastqc.html”
[68] “D_hypoleucum_2229_fastqc.zip”
[69] “D_hypoleucum_2253_fastqc.html”
[70] “D_hypoleucum_2253_fastqc.zip”
[71] “D_hypoleucum_25622_fastqc.html”
[72] “D_hypoleucum_25622_fastqc.zip”
[73] “D_hypoleucum_25637_fastqc.html”
[74] “D_hypoleucum_25637_fastqc.zip”
[75] “D_hypoleucum_25670_fastqc.html”
[76] “D_hypoleucum_25670_fastqc.zip”
[77] “D_hypoleucum_25672_fastqc.html”
[78] “D_hypoleucum_25672_fastqc.zip”
[79] “D_hypoleucum_25675_fastqc.html”
[80] “D_hypoleucum_25675_fastqc.zip”
[81] “D_hypoleucum_25868_fastqc.html”
[82] “D_hypoleucum_25868_fastqc.zip”
[83] “D_hypoleucum_25880_fastqc.html”
[84] “D_hypoleucum_25880_fastqc.zip”
[85] “D_hypoleucum_25921_fastqc.html”
[86] “D_hypoleucum_25921_fastqc.zip”
[87] “D_hypoleucum_26975_fastqc.html”
[88] “D_hypoleucum_26975_fastqc.zip”
[89] “D_hypoleucum_26984_fastqc.html”
[90] “D_hypoleucum_26984_fastqc.zip”
[91] “D_hypoleucum_27182_fastqc.html”
[92] “D_hypoleucum_27182_fastqc.zip”
[93] “D_hypoleucum_27450_fastqc.html”
[94] “D_hypoleucum_27450_fastqc.zip”
[95] “D_hypoleucum_27454_fastqc.html”
[96] “D_hypoleucum_27454_fastqc.zip”
[97] “D_hypoleucum_27468_fastqc.html”
[98] “D_hypoleucum_27468_fastqc.zip”
[99] “D_hypoleucum_27471_fastqc.html”
[100] “D_hypoleucum_27471_fastqc.zip”
[101] “D_hypoleucum_28273_fastqc.html”
[102] “D_hypoleucum_28273_fastqc.zip”
[103] “D_hypoleucum_28294_fastqc.html”
[104] “D_hypoleucum_28294_fastqc.zip”
[105] “D_hypoleucum_28329_fastqc.html”
[106] “D_hypoleucum_28329_fastqc.zip”
[107] “D_hypoleucum_28361_fastqc.html”
[108] “D_hypoleucum_28361_fastqc.zip”
[109] “D_hypoleucum_28376_fastqc.html”
[110] “D_hypoleucum_28376_fastqc.zip”
[111] “D_hypoleucum_28416_fastqc.html”
[112] “D_hypoleucum_28416_fastqc.zip”
[113] “D_hypoleucum_28584_fastqc.html”
[114] “D_hypoleucum_28584_fastqc.zip”
[115] “D_hypoleucum_28588_fastqc.html”
[116] “D_hypoleucum_28588_fastqc.zip”
[117] “D_hypoleucum_28596_fastqc.html”
[118] “D_hypoleucum_28596_fastqc.zip”
[119] “D_hypoleucum_28599_fastqc.html”
[120] “D_hypoleucum_28599_fastqc.zip”
[121] “D_hypoleucum_28663_fastqc.html”
[122] “D_hypoleucum_28663_fastqc.zip”
[123] “D_hypoleucum_28676_fastqc.html”
[124] “D_hypoleucum_28676_fastqc.zip”
[125] “D_hypoleucum_29945_fastqc.html”
[126] “D_hypoleucum_29945_fastqc.zip”
[127] “D_hypoleucum_29951_fastqc.html”
[128] “D_hypoleucum_29951_fastqc.zip”
[129] “D_hypoleucum_3095_fastqc.html”
[130] “D_hypoleucum_3095_fastqc.zip”
[131] “D_hypoleucum_3158_fastqc.html”
[132] “D_hypoleucum_3158_fastqc.zip”
[133] “D_hypoleucum_31636_fastqc.html”
[134] “D_hypoleucum_31636_fastqc.zip”
[135] “D_hypoleucum_31644_fastqc.html”
[136] “D_hypoleucum_31644_fastqc.zip”
[137] “D_hypoleucum_3208_fastqc.html”
[138] “D_hypoleucum_3208_fastqc.zip”
[139] “D_hypoleucum_3274_fastqc.html”
[140] “D_hypoleucum_3274_fastqc.zip”
[141] “D_hypoleucum_3275_fastqc.html”
[142] “D_hypoleucum_3275_fastqc.zip”
[143] “D_hypoleucum_3314_fastqc.html”
[144] “D_hypoleucum_3314_fastqc.zip”
[145] “D_hypoleucum_357608_fastqc.html”
[146] “D_hypoleucum_357608_fastqc.zip”
[147] “D_hypoleucum_357611_fastqc.html”
[148] “D_hypoleucum_357611_fastqc.zip”
[149] “D_hypoleucum_357612_fastqc.html”
[150] “D_hypoleucum_357612_fastqc.zip”
[151] “D_hypoleucum_357614_fastqc.html”
[152] “D_hypoleucum_357614_fastqc.zip”
[153] “D_hypoleucum_357615_fastqc.html”
[154] “D_hypoleucum_357615_fastqc.zip”
[155] “D_hypoleucum_454950_fastqc.html”
[156] “D_hypoleucum_454950_fastqc.zip”
[157] “D_hypoleucum_462070_fastqc.html”
[158] “D_hypoleucum_462070_fastqc.zip”
[159] “D_hypoleucum_472816_fastqc.html”
[160] “D_hypoleucum_472816_fastqc.zip”
[161] “D_hypoleucum_FMNH454949_fastqc.html” [162] “D_hypoleucum_FMNH454949_fastqc.zip” [163] “D_nigrilore_KU28413_fastqc.html”
[164] “D_nigrilore_KU28413_fastqc.zip”
[165] “D_nigrilore_KU28414_fastqc.html”
[166] “D_nigrilore_KU28414_fastqc.zip”
[167] “qc.Rmd”
[168] “run.fastqcr.sh”
[169] “slurm-39045614.out”

#create a character vector where each value is the full path to the .zip created by fastqc() for a given sample
samps<-list.files(qc.dir, full.names = T, pattern = "*.zip")

#plot qc test results for each sample
for (i in samps){
  #read info for given sample from the .zip file generated in the previous step
  samp.info <- qc_read(i)
  #open blank list to hold qc visualizations for the given sample
  plot<-list()
  #do qc for the given sample
  plot[[1]]<-qc_plot(samp.info, "Basic statistics")
  plot[[2]]<-qc_plot(samp.info, "Per sequence quality scores")
  plot[[3]]<-qc_plot(samp.info, "Sequence duplication levels")
  #visualize tables
  print(paste0("QC results for sample ", gsub(".*/", "", i)))

  cat('\n')

  print(kable(plot[[1]]))

  cat('\n')

  #visualize plots
  grid.arrange(plot[[2]],plot[[3]],
               ncol=2)
  
  #clear plot to hold info for next sample
  rm(plot)
}

[1] “QC results for sample D_hypoleucum_1271_fastqc.zip”

Measure Value
Filename D_hypoleucum_1271.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2562068
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_1273_fastqc.zip”

Measure Value
Filename D_hypoleucum_1273.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1078823
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_1275_fastqc.zip”

Measure Value
Filename D_hypoleucum_1275.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 10536603
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_14037_fastqc.zip”

Measure Value
Filename D_hypoleucum_14037.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2022496
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_14061_fastqc.zip”

Measure Value
Filename D_hypoleucum_14061.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4537
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_14065_fastqc.zip”

Measure Value
Filename D_hypoleucum_14065.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2484741
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_14075_fastqc.zip”

Measure Value
Filename D_hypoleucum_14075.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3086972
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_14079_fastqc.zip”

Measure Value
Filename D_hypoleucum_14079.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 429074
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_14120_fastqc.zip”

Measure Value
Filename D_hypoleucum_14120.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 181181
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_14146_fastqc.zip”

Measure Value
Filename D_hypoleucum_14146.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 121589
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_14181_fastqc.zip”

Measure Value
Filename D_hypoleucum_14181.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2708992
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_14225_fastqc.zip”

Measure Value
Filename D_hypoleucum_14225.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 27815
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_17969_fastqc.zip”

Measure Value
Filename D_hypoleucum_17969.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3739967
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_17976_fastqc.zip”

Measure Value
Filename D_hypoleucum_17976.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 128066
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_18070_fastqc.zip”

Measure Value
Filename D_hypoleucum_18070.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 662481
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_18159_fastqc.zip”

Measure Value
Filename D_hypoleucum_18159.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2012398
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_18188_fastqc.zip”

Measure Value
Filename D_hypoleucum_18188.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 34854
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_18191_fastqc.zip”

Measure Value
Filename D_hypoleucum_18191.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1755921
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_18193_fastqc.zip”

Measure Value
Filename D_hypoleucum_18193.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2319001
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_19046_fastqc.zip”

Measure Value
Filename D_hypoleucum_19046.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4448354
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_19066_fastqc.zip”

Measure Value
Filename D_hypoleucum_19066.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 109819
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample D_hypoleucum_19136_fastqc.zip”

Measure Value
Filename D_hypoleucum_19136.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3442294
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_19177_fastqc.zip”

Measure Value
Filename D_hypoleucum_19177.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 786487
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_19178_fastqc.zip”

Measure Value
Filename D_hypoleucum_19178.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1164260
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_1956_fastqc.zip”

Measure Value
Filename D_hypoleucum_1956.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1126872
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_19638_fastqc.zip”

Measure Value
Filename D_hypoleucum_19638.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 575513
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_20193_fastqc.zip”

Measure Value
Filename D_hypoleucum_20193.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 149995
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_20213_fastqc.zip”

Measure Value
Filename D_hypoleucum_20213.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 263396
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_20214_fastqc.zip”

Measure Value
Filename D_hypoleucum_20214.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 5621
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_20218_fastqc.zip”

Measure Value
Filename D_hypoleucum_20218.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3868526
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_2067_fastqc.zip”

Measure Value
Filename D_hypoleucum_2067.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1047026
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_20921_fastqc.zip”

Measure Value
Filename D_hypoleucum_20921.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1292366
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_2208_fastqc.zip”

Measure Value
Filename D_hypoleucum_2208.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1084240
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_2229_fastqc.zip”

Measure Value
Filename D_hypoleucum_2229.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 247956
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_2253_fastqc.zip”

Measure Value
Filename D_hypoleucum_2253.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 437506
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_25622_fastqc.zip”

Measure Value
Filename D_hypoleucum_25622.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 47472
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_25637_fastqc.zip”

Measure Value
Filename D_hypoleucum_25637.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1805728
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_25670_fastqc.zip”

Measure Value
Filename D_hypoleucum_25670.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 291600
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_25672_fastqc.zip”

Measure Value
Filename D_hypoleucum_25672.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 165351
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_25675_fastqc.zip”

Measure Value
Filename D_hypoleucum_25675.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 173293
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_25868_fastqc.zip”

Measure Value
Filename D_hypoleucum_25868.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 887902
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_25880_fastqc.zip”

Measure Value
Filename D_hypoleucum_25880.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4183158
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_25921_fastqc.zip”

Measure Value
Filename D_hypoleucum_25921.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3534392
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_26975_fastqc.zip”

Measure Value
Filename D_hypoleucum_26975.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 366479
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_26984_fastqc.zip”

Measure Value
Filename D_hypoleucum_26984.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1512918
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_27182_fastqc.zip”

Measure Value
Filename D_hypoleucum_27182.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 67266
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_27450_fastqc.zip”

Measure Value
Filename D_hypoleucum_27450.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 890749
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_27454_fastqc.zip”

Measure Value
Filename D_hypoleucum_27454.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 26782
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_27468_fastqc.zip”

Measure Value
Filename D_hypoleucum_27468.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 28673
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_27471_fastqc.zip”

Measure Value
Filename D_hypoleucum_27471.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 50538
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_28273_fastqc.zip”

Measure Value
Filename D_hypoleucum_28273.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 12893
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_28294_fastqc.zip”

Measure Value
Filename D_hypoleucum_28294.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 7237647
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_28329_fastqc.zip”

Measure Value
Filename D_hypoleucum_28329.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 728838
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_28361_fastqc.zip”

Measure Value
Filename D_hypoleucum_28361.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6004872
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_28376_fastqc.zip”

Measure Value
Filename D_hypoleucum_28376.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4319704
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_28416_fastqc.zip”

Measure Value
Filename D_hypoleucum_28416.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2567648
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_28584_fastqc.zip”

Measure Value
Filename D_hypoleucum_28584.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6512
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample D_hypoleucum_28588_fastqc.zip”

Measure Value
Filename D_hypoleucum_28588.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 33935
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_28596_fastqc.zip”

Measure Value
Filename D_hypoleucum_28596.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 26278
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_28599_fastqc.zip”

Measure Value
Filename D_hypoleucum_28599.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 32074
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_28663_fastqc.zip”

Measure Value
Filename D_hypoleucum_28663.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 5416453
Sequences flagged as poor quality 0
Sequence length 95
%GC 39

[1] “QC results for sample D_hypoleucum_28676_fastqc.zip”

Measure Value
Filename D_hypoleucum_28676.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2472283
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_29945_fastqc.zip”

Measure Value
Filename D_hypoleucum_29945.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1487775
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_29951_fastqc.zip”

Measure Value
Filename D_hypoleucum_29951.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3440054
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_3095_fastqc.zip”

Measure Value
Filename D_hypoleucum_3095.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 579001
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_3158_fastqc.zip”

Measure Value
Filename D_hypoleucum_3158.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1092329
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_31636_fastqc.zip”

Measure Value
Filename D_hypoleucum_31636.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1612710
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_31644_fastqc.zip”

Measure Value
Filename D_hypoleucum_31644.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 785653
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_3208_fastqc.zip”

Measure Value
Filename D_hypoleucum_3208.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1499320
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_3274_fastqc.zip”

Measure Value
Filename D_hypoleucum_3274.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4454397
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_3275_fastqc.zip”

Measure Value
Filename D_hypoleucum_3275.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 7125
Sequences flagged as poor quality 0
Sequence length 95
%GC 35

[1] “QC results for sample D_hypoleucum_3314_fastqc.zip”

Measure Value
Filename D_hypoleucum_3314.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 922902
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_hypoleucum_357608_fastqc.zip”

Measure Value
Filename D_hypoleucum_357608.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 159823
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_357611_fastqc.zip”

Measure Value
Filename D_hypoleucum_357611.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 785
Sequences flagged as poor quality 0
Sequence length 95
%GC 39

[1] “QC results for sample D_hypoleucum_357612_fastqc.zip”

Measure Value
Filename D_hypoleucum_357612.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 150533
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_357614_fastqc.zip”

Measure Value
Filename D_hypoleucum_357614.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 21079
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_357615_fastqc.zip”

Measure Value
Filename D_hypoleucum_357615.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1541338
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_454950_fastqc.zip”

Measure Value
Filename D_hypoleucum_454950.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 974035
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_462070_fastqc.zip”

Measure Value
Filename D_hypoleucum_462070.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2014875
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_hypoleucum_472816_fastqc.zip”

Measure Value
Filename D_hypoleucum_472816.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3994
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample D_hypoleucum_FMNH454949_fastqc.zip”

Measure Value
Filename D_hypoleucum_FMNH454949.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 263252
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample D_nigrilore_KU28413_fastqc.zip”

Measure Value
Filename D_nigrilore_KU28413.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 696250
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

[1] “QC results for sample D_nigrilore_KU28414_fastqc.zip”

Measure Value
Filename D_nigrilore_KU28414.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2225205
Sequences flagged as poor quality 0
Sequence length 95
%GC 38

#aggregate the reports by pointing this function to the folder holding output of fastqc()
qc <- qc_aggregate(qc.dir, progressbar = F)

#stats per sample
knitr::kable(qc_stats(qc))
sample pct.dup pct.gc tot.seq seq.length
D_hypoleucum_1271.fq.gz 96.84 37 2562068 95
D_hypoleucum_1273.fq.gz 95.72 38 1078823 95
D_hypoleucum_1275.fq.gz 97.85 37 10536603 95
D_hypoleucum_14037.fq.gz 96.27 37 2022496 95
D_hypoleucum_14061.fq.gz 53.36 37 4537 95
D_hypoleucum_14065.fq.gz 96.78 37 2484741 95
D_hypoleucum_14075.fq.gz 96.95 37 3086972 95
D_hypoleucum_14079.fq.gz 92.98 37 429074 95
D_hypoleucum_14120.fq.gz 89.20 37 181181 95
D_hypoleucum_14146.fq.gz 87.90 37 121589 95
D_hypoleucum_14181.fq.gz 96.44 38 2708992 95
D_hypoleucum_14225.fq.gz 74.61 37 27815 95
D_hypoleucum_17969.fq.gz 97.20 37 3739967 95
D_hypoleucum_17976.fq.gz 88.83 37 128066 95
D_hypoleucum_18070.fq.gz 94.38 37 662481 95
D_hypoleucum_18159.fq.gz 96.98 37 2012398 95
D_hypoleucum_18188.fq.gz 79.26 38 34854 95
D_hypoleucum_18191.fq.gz 96.92 37 1755921 95
D_hypoleucum_18193.fq.gz 96.87 38 2319001 95
D_hypoleucum_19046.fq.gz 97.12 38 4448354 95
D_hypoleucum_19066.fq.gz 87.88 36 109819 95
D_hypoleucum_19136.fq.gz 97.33 38 3442294 95
D_hypoleucum_19177.fq.gz 95.29 37 786487 95
D_hypoleucum_19178.fq.gz 96.26 37 1164260 95
D_hypoleucum_1956.fq.gz 95.40 38 1126872 95
D_hypoleucum_19638.fq.gz 94.78 37 575513 95
D_hypoleucum_20193.fq.gz 87.89 38 149995 95
D_hypoleucum_20213.fq.gz 92.61 38 263396 95
D_hypoleucum_20214.fq.gz 70.36 38 5621 95
D_hypoleucum_20218.fq.gz 97.54 37 3868526 95
D_hypoleucum_2067.fq.gz 95.20 38 1047026 95
D_hypoleucum_20921.fq.gz 95.50 37 1292366 95
D_hypoleucum_2208.fq.gz 95.43 37 1084240 95
D_hypoleucum_2229.fq.gz 83.50 38 247956 95
D_hypoleucum_2253.fq.gz 93.87 37 437506 95
D_hypoleucum_25622.fq.gz 82.51 37 47472 95
D_hypoleucum_25637.fq.gz 95.75 37 1805728 95
D_hypoleucum_25670.fq.gz 91.11 38 291600 95
D_hypoleucum_25672.fq.gz 87.82 38 165351 95
D_hypoleucum_25675.fq.gz 90.30 37 173293 95
D_hypoleucum_25868.fq.gz 94.59 37 887902 95
D_hypoleucum_25880.fq.gz 97.45 37 4183158 95
D_hypoleucum_25921.fq.gz 97.01 37 3534392 95
D_hypoleucum_26975.fq.gz 91.13 38 366479 95
D_hypoleucum_26984.fq.gz 96.09 37 1512918 95
D_hypoleucum_27182.fq.gz 84.92 37 67266 95
D_hypoleucum_27450.fq.gz 94.08 37 890749 95
D_hypoleucum_27454.fq.gz 73.99 37 26782 95
D_hypoleucum_27468.fq.gz 65.15 37 28673 95
D_hypoleucum_27471.fq.gz 79.26 38 50538 95
D_hypoleucum_28273.fq.gz 68.49 37 12893 95
D_hypoleucum_28294.fq.gz 97.61 38 7237647 95
D_hypoleucum_28329.fq.gz 94.23 38 728838 95
D_hypoleucum_28361.fq.gz 97.55 38 6004872 95
D_hypoleucum_28376.fq.gz 97.21 37 4319704 95
D_hypoleucum_28416.fq.gz 96.84 38 2567648 95
D_hypoleucum_28584.fq.gz 77.21 36 6512 95
D_hypoleucum_28588.fq.gz 75.69 38 33935 95
D_hypoleucum_28596.fq.gz 75.04 37 26278 95
D_hypoleucum_28599.fq.gz 69.27 37 32074 95
D_hypoleucum_28663.fq.gz 97.71 39 5416453 95
D_hypoleucum_28676.fq.gz 96.87 38 2472283 95
D_hypoleucum_29945.fq.gz 95.94 38 1487775 95
D_hypoleucum_29951.fq.gz 97.37 38 3440054 95
D_hypoleucum_3095.fq.gz 94.59 37 579001 95
D_hypoleucum_3158.fq.gz 96.00 37 1092329 95
D_hypoleucum_31636.fq.gz 95.67 38 1612710 95
D_hypoleucum_31644.fq.gz 94.85 38 785653 95
D_hypoleucum_3208.fq.gz 96.53 38 1499320 95
D_hypoleucum_3274.fq.gz 97.49 37 4454397 95
D_hypoleucum_3275.fq.gz 79.69 35 7125 95
D_hypoleucum_3314.fq.gz 95.93 38 922902 95
D_hypoleucum_357608.fq.gz 88.82 37 159823 95
D_hypoleucum_357611.fq.gz 77.32 39 785 95
D_hypoleucum_357612.fq.gz 90.21 37 150533 95
D_hypoleucum_357614.fq.gz 68.76 37 21079 95
D_hypoleucum_357615.fq.gz 96.66 37 1541338 95
D_hypoleucum_454950.fq.gz 95.81 37 974035 95
D_hypoleucum_462070.fq.gz 96.78 37 2014875 95
D_hypoleucum_472816.fq.gz 76.74 36 3994 95
D_hypoleucum_FMNH454949.fq.gz 89.17 37 263252 95
D_nigrilore_KU28413.fq.gz 94.49 38 696250 95
D_nigrilore_KU28414.fq.gz 96.26 38 2225205 95

solid red line = median sample value

dashed red line = 10% of median sample value

#save stats info as an object
stats.info<-qc_stats(qc)
#make tot.seq numeric
stats.info$tot.seq<-as.numeric(stats.info$tot.seq)

#make histogram of number of sequence reads for each sample
ggplot(stats.info, aes(x=tot.seq))+
              geom_histogram(color="black", fill="white", bins=20)+
              geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
              geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
              theme_classic()+
              xlab("Number of sequencing reads")

#solid red line = median sample value
#dashed red line = 10% of median sample value
ggplot(stats.info, aes(x=tot.seq))+
              geom_histogram(color="black", fill="white", bins=200)+
              geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
              geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
              theme_classic()+
              xlab("Number of sequencing reads")

#show me the samples that have less than 10% of the number of reads as the median sample from this experiment (these should be dropped immediately)
print(paste("Median sample contains", median(stats.info$tot.seq), "reads. The following samples contain less than", median(stats.info$tot.seq)*.1, "reads (10% of the median), and should likely be dropped"))

[1] “Median sample contains 887902 reads. The following samples contain less than 88790.2 reads (10% of the median), and should likely be dropped”

knitr::kable(stats.info[stats.info$tot.seq < median(stats.info$tot.seq)*.1,])
sample pct.dup pct.gc tot.seq seq.length
D_hypoleucum_14061.fq.gz 53.36 37 4537 95
D_hypoleucum_14225.fq.gz 74.61 37 27815 95
D_hypoleucum_18188.fq.gz 79.26 38 34854 95
D_hypoleucum_20214.fq.gz 70.36 38 5621 95
D_hypoleucum_25622.fq.gz 82.51 37 47472 95
D_hypoleucum_27182.fq.gz 84.92 37 67266 95
D_hypoleucum_27454.fq.gz 73.99 37 26782 95
D_hypoleucum_27468.fq.gz 65.15 37 28673 95
D_hypoleucum_27471.fq.gz 79.26 38 50538 95
D_hypoleucum_28273.fq.gz 68.49 37 12893 95
D_hypoleucum_28584.fq.gz 77.21 36 6512 95
D_hypoleucum_28588.fq.gz 75.69 38 33935 95
D_hypoleucum_28596.fq.gz 75.04 37 26278 95
D_hypoleucum_28599.fq.gz 69.27 37 32074 95
D_hypoleucum_3275.fq.gz 79.69 35 7125 95
D_hypoleucum_357611.fq.gz 77.32 39 785 95
D_hypoleucum_357614.fq.gz 68.76 37 21079 95
D_hypoleucum_472816.fq.gz 76.74 36 3994 95